Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
Registered S3 method overwritten by 'cli':
method from
print.boxx spatstat
[37m── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[37m[32m✓[37m [34mtibble [37m 3.0.6 [32m✓[37m [34mdplyr [37m 1.0.4
[32m✓[37m [34mtidyr [37m 1.1.2 [32m✓[37m [34mstringr[37m 1.4.0
[32m✓[37m [34mreadr [37m 1.4.0 [32m✓[37m [34mforcats[37m 0.5.1
[32m✓[37m [34mpurrr [37m 0.3.4 [39m
[37m── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[37m [34mdplyr[37m::[32mbetween()[37m masks [34mdata.table[37m::between()
[31mx[37m [34mtidyr[37m::[32mexpand()[37m masks [34mMatrix[37m::expand()
[31mx[37m [34mdplyr[37m::[32mfilter()[37m masks [34mplotly[37m::filter(), [34mstats[37m::filter()
[31mx[37m [34mdplyr[37m::[32mfirst()[37m masks [34mdata.table[37m::first()
[31mx[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()
[31mx[37m [34mdplyr[37m::[32mlast()[37m masks [34mdata.table[37m::last()
[31mx[37m [34mtidyr[37m::[32mpack()[37m masks [34mMatrix[37m::pack()
[31mx[37m [34mpurrr[37m::[32mtranspose()[37m masks [34mdata.table[37m::transpose()
[31mx[37m [34mtidyr[37m::[32munpack()[37m masks [34mMatrix[37m::unpack()[39m
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
The following object is masked from ‘package:plotly’:
select
---------------------
Welcome to dendextend version 1.14.0
Type citation('dendextend') for how to cite the package.
Type browseVignettes(package = 'dendextend') for the package vignette.
The github page is: https://github.com/talgalili/dendextend/
Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
Or contact: <tal.galili@gmail.com>
To suppress this message use: suppressPackageStartupMessages(library(dendextend))
---------------------
Attaching package: ‘dendextend’
The following object is masked from ‘package:data.table’:
set
The following object is masked from ‘package:stats’:
cutree
To demostrate how to do the gene clustering usign COTAN we begin importing the COTAN object that stores all elaborated data and, in this case, regarding a mouse embrionic cortex dataset (developmental stage E17.5).
input_dir = "Data/"
layers = list("L1"=c("Reln","Lhx5"), "L2/3"=c("Satb2","Cux1"), "L4"=c("Rorb","Sox5") , "L5/6"=c("Bcl11b","Fezf2") , "Prog"=c("Vim","Hes1"))
#objE17 = readRDS(file = paste(input_dir,"E17.5_cortex.cotan.RDS", sep = ""))
objE17 = readRDS(file = paste(input_dir,"E17.5_cortex.cotan.RDS", sep = ""))g.space = get.gene.coexpression.space(objE17,
n.genes.for.marker = 28,#25
primary.markers = unlist(layers))[1] "calculating gene coexpression space: output tanh of reduced coex matrix"
L11 L12 L2/31 L2/32 L41 L42 L5/61 L5/62 Prog1 Prog2
"Reln" "Lhx5" "Satb2" "Cux1" "Rorb" "Sox5" "Bcl11b" "Fezf2" "Vim" "Hes1"
[1] "Get p-values on a set of genes on columns genome wide on rows"
[1] "Using function S"
[1] "function to generate S "
[1] "function to generate S "
g.space = as.data.frame(as.matrix(g.space))
coex.pca.genes <- prcomp(t(g.space),
center = TRUE,
scale. = F)
fviz_eig(coex.pca.genes, addlabels=TRUE,ncp = 10)Hierarchical clustering
# clustering usign Ward method
hc.norm = hclust(dist(g.space), method = "ward.D2")
# and cut the tree into 5 clusters (for example)
cut = cutree(hc.norm, k = 7, order_clusters_as_data = F)
# It crates the tree
dend <- as.dendrogram(hc.norm)
# I can use a dataframe from the pca to store some data regarding the clustering
pca_1 = as.data.frame(coex.pca.genes$rotation[,1:5])
pca_1 = pca_1[order.dendrogram(dend),]
mycolours <- c("genes related to L5/6" = "#3C5488FF","genes related to L2/3"="#F39B7FFF","genes related to Prog"="#4DBBD5FF","genes related to L1"="#E64B35FF","genes related to L4" = "#91D1C2FF", "not marked"="#B09C85FF")
# save the cluster number into the dataframe
pca_1$hclust = cut
dend =branches_color(dend,k=7,col=c("#4DBBD5FF","#91D1C2FF","#F39B7FFF","#E64B35FF","#3C5488FF","#91D1C2FF","#B09C85FF" ),groupLabels = T)
#dend =color_labels(dend,k=5,labels = rownames(pca_1),col=pca_1$colors)
# plot the dendrogram with all anmes for secondary markers
dend %>%
set("labels", ifelse(labels(dend) %in% rownames(pca_1)[rownames(pca_1) %in% colnames(as.matrix(g.space))], labels(dend), "")) %>%
# set("branches_k_color", value = c("gray80","#4DBBD5FF","#91D1C2FF" ,"gray80","#F39B7FFF","#E64B35FF","#3C5488FF"), k = 7) %>%
plot(horiz=F, axes=T)#,ylim = c(0,80))or just with primary markers
dend %>%
set("labels", ifelse(labels(dend) %in% rownames(pca_1)[rownames(pca_1) %in% unlist(layers)], labels(dend), "")) %>%
set("branches_k_color", value = c("gray80","#4DBBD5FF","#91D1C2FF" ,"gray80","#F39B7FFF","#E64B35FF","#3C5488FF"), k = 7) %>%
plot(horiz=F, axes=T,ylim = c(0,100))Now we can plot the PCA
# some more genes as landmarks
controls =list("genes related to L5/6"=c("Foxp2","Tbr1"), "genes related to L2/3"=c("Mef2c"), "genes related to Prog"=c("Nes","Sox2") , "genes related to L1"=c() , "genes related to L4"=c())
# dataframe to be able to label only primary markers and control genes
textdf <- pca_1[rownames(pca_1) %in% c(unlist(layers),unlist(controls)) , ]
for (m in c(1:length(controls))) {
for (g in controls[[m]]) {
if(g %in% rownames(textdf)){
textdf[g,"highlight"] = names(controls[m])
}
}
}
# deciding the colors
mycolours <- c("genes related to L5/6" = "#3C5488FF","genes related to L2/3"="#F39B7FFF","genes related to Prog"="#4DBBD5FF","genes related to L1"="#E64B35FF","genes related to L4" = "#91D1C2FF", "not marked"="#B09C85FF")
# to assing correcly the cluster number and the color
mycolours2 = c("Reln","Satb2","Rorb","Bcl11b","Vim")
names(mycolours2) = unique(cut[unlist(layers)])
mycolours2[mycolours2 == "Reln"] = "#E64B35FF"
mycolours2[mycolours2 == "Satb2"] = "#F39B7FFF"
mycolours2[mycolours2 == "Rorb"] = "#91D1C2FF"
mycolours2[mycolours2 == "Bcl11b"] = "#3C5488FF"
mycolours2[mycolours2 == "Vim"] = "#4DBBD5FF"
color_to_add = unique(pca_1$hclust)[!unique(pca_1$hclust) %in% as.numeric(names(mycolours2))]
names(color_to_add) = unique(pca_1$hclust)[!unique(pca_1$hclust) %in% as.numeric(names(mycolours2))]
color_to_add[color_to_add %in%
unique(pca_1$hclust)[!unique(pca_1$hclust) %in% as.numeric(names(mycolours2))]] = "#B09C85FF"
mycolours2 = c(mycolours2,color_to_add)
pca1 = ggplot(subset(pca_1,!hclust %in% unique(cut[unlist(layers)]) ), aes(x=PC1, y=PC2)) + geom_point(alpha = 0.3,color = "#B09C85FF",size=1)
#pca2 = pca1 + geom_point(data = subset(pca_1, highlight != "not marked" ), aes(x=PC1, y=PC2, colour=hclust),size=2.5,alpha = 0.9)
pca2 = pca1 + geom_point(data = subset(pca_1, hclust %in% unique(cut[unlist(layers)]) ), aes(x=PC1, y=PC2, colour=as.character(hclust)),size=1,alpha = 0.5) +
scale_color_manual( "Status", values = mycolours2) +
scale_fill_manual( "Status", values = mycolours2) +
xlab("") + ylab("") +
geom_label_repel(data =textdf , aes(x = PC1, y = PC2, label = rownames(textdf),fill=as.character(hclust)),
label.size = NA,
alpha = 0.5,
direction = "both",
na.rm=TRUE,
seed = 1234) +
geom_label_repel(data =textdf , aes(x = PC1, y = PC2, label = rownames(textdf)),
label.size = NA,
segment.color = 'black',
segment.size = 0.5,
direction = "both",
alpha = 1,
na.rm=TRUE,
fill = NA,
seed = 1234) +
ggtitle("PCA") +
theme_light(base_size=10) +
theme(axis.text.x=element_blank(),plot.title = element_text(size=14,
face="italic",
color="#3C5488FF",
hjust=0.01,
lineheight=1.2,margin = margin(t = 5, b = -15)),
axis.text.y=element_blank(),
legend.position = "none") # titl)
pca2 #+ geom_encircle(data = pca_1, aes(group=hclust)) t-SNE code and plot
# run the t-SNE
cl.genes.tsne = Rtsne(g.space ,initial_dims = 100, dims = 2, perplexity=30,eta = 200, verbose=F, max_iter = 3000,theta=0.4,num_threads = 10,pca_center = T, pca_scale = FALSE, normalize = T )
d_tsne_1 = as.data.frame(cl.genes.tsne$Y)
rownames(d_tsne_1) = rownames(g.space)
d_tsne_1 = d_tsne_1[order.dendrogram(dend),]
# save the cluster numebr inside a dataframe with the t-SNE information
d_tsne_1$hclust = cut
d_tsne_1$names = rownames(d_tsne_1)
# as before to label only some genes
textdf <- d_tsne_1[rownames(d_tsne_1) %in% c(unlist(layers),unlist(controls)),]
for (m in c(1:length(controls))) {
for (g in controls[[m]]) {
if(g %in% rownames(textdf)){
textdf[g,"highlight"] = names(controls[m])
}
}
}
p1 = ggplot(subset(d_tsne_1,!hclust %in% unique(cut[unlist(layers)])), aes(x=V1, y=V2)) + geom_point(alpha = 0.3, color = "#B09C85FF", size=1)
p2 = p1 + geom_point(data = subset(d_tsne_1, hclust %in% unique(cut[unlist(layers)]) ), aes(x=V1, y=V2, colour=as.character(hclust)),size=1,alpha = 0.5) +
scale_color_manual("Status", values = mycolours2) +
scale_fill_manual("Status", values = mycolours2) +
xlab("") + ylab("")+
geom_label_repel(data =textdf , aes(x = V1, y = V2, label = names,fill=as.character(hclust)),
label.size = NA,
alpha = 0.5,
direction = "both",
na.rm=TRUE,
seed = 1234) +
geom_label_repel(data =textdf , aes(x = V1, y = V2, label = names),
label.size = NA,
segment.color = 'black',
segment.size = 0.5,
direction = "both",
alpha = 1,
na.rm=TRUE,
fill = NA,
seed = 1234) +
ggtitle("t-SNE") +
theme_light(base_size=10) +
theme(axis.text.x=element_blank(),plot.title = element_text(size=14,
face="italic",
color="#3C5488FF",
hjust=0.01,
lineheight=1.2,margin = margin(t = 5, b = -15)),
axis.text.y=element_blank(),
legend.position = "none") # titl)
p2Code to create an iteractive plot. This can be modified to be used with all the plots.
p = ggplot(d_tsne_1, aes(x=V1, y=V2, text= paste("gene: ",names))) +
geom_point(size=2, aes(colour=as.character(hclust)), alpha=0.8) +
scale_color_manual("Status", values = mycolours2) +
xlab("") + ylab("") +
ggtitle("t-SNE") +
theme_light(base_size=10) +
theme(axis.text.x=element_blank(),
axis.text.y=element_blank())
ggplotly(p)Multidimensional scaling (MDS) and plot
# run the MDS
genes.dist.euc = dist(g.space, method = "euclidean")
#fit <- isoMDS(genes.dist.euc) # not linear
fit <- isoMDS(genes.dist.euc)initial value 12.149889
final value 10.248596
converged
fit.genes = as.data.frame(fit$points)
fit.genes = fit.genes[order.dendrogram(dend),]
fit.genes$hclust = cut
fit.genes$names = rownames(fit.genes)
mycolours3 <- c("cluster L5/6 markers" = "#3C5488FF","cluster L2/3 markers"="#F39B7FFF","cluster Prog markers"="#4DBBD5FF","cluster L1 markers"="#E64B35FF","cluster L4 markers" = "#91D1C2FF", "not identified cluster"="#B09C85FF")
#fit.genes$hclust = factor(cutree(hc.norm, 7))
used = vector()
for (k in c(1:length(layers))) {
#print(k)
tt =as.numeric(cut[layers[[k]]][1])
fit.genes[fit.genes$hclust == tt,"cluster"] = paste("cluster",names(layers[k]),"markers", sep = " " )
used = c(used,cut[layers[[k]]][1])
}
fit.genes[fit.genes$hclust %in% (unique(fit.genes$hclust)[!unique(fit.genes$hclust) %in% used]),]$cluster = "not identified cluster"
textdf <- fit.genes[rownames(fit.genes) %in% c(unlist(layers),unlist(controls)),]
f1 = ggplot(subset(fit.genes,!hclust %in% unique(cut[unlist(layers)]) ), aes(x=V1, y=V2)) + geom_point(alpha = 0.3, color = "#B09C85FF", size=1)
f2 = f1 + geom_point(data = subset(fit.genes, hclust %in% unique(cut[unlist(layers)]) ),
aes(x=V1, y=V2, colour=cluster), size=1,alpha = 0.5) +
scale_color_manual("Status", values = mycolours3) +
scale_fill_manual("Status", values = mycolours3) +
xlab("") + ylab("")+
geom_label_repel(data =textdf , aes(x = V1, y = V2, label = rownames(textdf),fill=cluster),
label.size = NA,
alpha = 0.5,
direction ="both",
na.rm=TRUE,
seed = 1234, show.legend = FALSE) +
geom_label_repel(data =textdf , aes(x = V1, y = V2, label = rownames(textdf)),
label.size = NA,
segment.color = 'black',
segment.size = 0.5,
direction = "both",
alpha = 1,
na.rm=TRUE,
fill = NA,
seed = 1234, show.legend = FALSE) +
ggtitle("MDS") +
theme_light(base_size=10) +
theme(axis.text.x=element_blank(),plot.title = element_text(size=14,
face="italic",
color="#3C5488FF",
hjust=0.01,
lineheight=1.2,margin = margin(t = 5, b = -15)),
axis.text.y=element_blank(),
legend.title = element_blank(),
legend.text = element_text(color = "#3C5488FF",face ="italic" ),
legend.position = "bottom") # titl)
f2 #+ geom_encircle(data = fit.genes, aes(group=`5_clusters`)) R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] dendextend_1.14.0 MASS_7.3-53.1 htmlwidgets_1.5.3 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4
[7] purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.6 tidyverse_1.3.0 plotly_4.9.3
[13] Rtsne_0.15 factoextra_1.0.7 ggrepel_0.9.1 ggplot2_3.3.3 Matrix_1.3-2 data.table_1.13.6
[19] COTAN_0.1.0
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.2.1 circlize_0.4.12 plyr_1.8.6 igraph_1.2.6
[6] lazyeval_0.2.2 splines_4.0.4 crosstalk_1.1.1 listenv_0.8.0 scattermore_0.7
[11] digest_0.6.27 htmltools_0.5.1.1 viridis_0.5.1 magrittr_2.0.1 tensor_1.5
[16] cluster_2.1.1 ROCR_1.0-11 openxlsx_4.2.3 ComplexHeatmap_2.6.2 globals_0.14.0
[21] modelr_0.1.8 matrixStats_0.58.0 colorspace_2.0-0 rvest_0.3.6 rappdirs_0.3.3
[26] haven_2.3.1 xfun_0.20 crayon_1.4.0 jsonlite_1.7.2 spatstat_1.64-1
[31] spatstat.data_2.0-0 survival_3.2-7 zoo_1.8-8 glue_1.4.2 polyclip_1.10-0
[36] gtable_0.3.0 leiden_0.3.7 GetoptLong_1.0.5 car_3.0-10 future.apply_1.7.0
[41] shape_1.4.5 BiocGenerics_0.36.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1
[46] rstatix_0.7.0 miniUI_0.1.1.1 Rcpp_1.0.6 viridisLite_0.3.0 xtable_1.8-4
[51] clue_0.3-58 reticulate_1.18 foreign_0.8-81 latex2exp_0.4.0 stats4_4.0.4
[56] httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1 Seurat_4.0.0 ica_1.0-2
[61] pkgconfig_2.0.3 farver_2.0.3 sass_0.3.1 uwot_0.1.10 dbplyr_2.1.0
[66] deldir_0.2-10 tidyselect_1.1.0 labeling_0.4.2 rlang_0.4.10 reshape2_1.4.4
[71] later_1.1.0.1 cellranger_1.1.0 munsell_0.5.0 tools_4.0.4 cli_2.3.0
[76] generics_0.1.0 broom_0.7.5 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0
[81] yaml_2.2.1 goftest_1.2-2 fs_1.5.0 knitr_1.31 fitdistrplus_1.1-3
[86] zip_2.1.1 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-152
[91] mime_0.9 xml2_1.3.2 rstudioapi_0.13 compiler_4.0.4 curl_4.3
[96] filelock_1.0.2 png_0.1-7 ggsignif_0.6.1 spatstat.utils_2.0-0 reprex_1.0.0
[101] bslib_0.2.4 stringi_1.5.3 basilisk.utils_1.2.2 lattice_0.20-41 vctrs_0.3.6
[106] pillar_1.4.7 lifecycle_0.2.0 lmtest_0.9-38 jquerylib_0.1.3 GlobalOptions_0.1.2
[111] RcppAnnoy_0.0.18 cowplot_1.1.1 irlba_2.3.3 httpuv_1.5.5 patchwork_1.1.1
[116] R6_2.5.0 promises_1.1.1 rio_0.5.16 KernSmooth_2.23-18 gridExtra_2.3
[121] IRanges_2.24.1 parallelly_1.23.0 codetools_0.2-18 assertthat_0.2.1 rjson_0.2.20
[126] withr_2.4.1 SeuratObject_4.0.0 sctransform_0.3.2 S4Vectors_0.28.1 mgcv_1.8-33
[131] parallel_4.0.4 hms_1.0.0 grid_4.0.4 rpart_4.1-15 basilisk_1.2.1
[136] rmarkdown_2.7 carData_3.0-4 Cairo_1.5-12.2 ggpubr_0.4.0 shiny_1.6.0
[141] lubridate_1.7.9.2